1 package org.apache.lucene.search.postingshighlight;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 import org.apache.lucene.analysis.Analyzer;
21 import org.apache.lucene.analysis.MockAnalyzer;
22 import org.apache.lucene.analysis.MockTokenizer;
23 import org.apache.lucene.document.Document;
24 import org.apache.lucene.document.Field;
25 import org.apache.lucene.document.FieldType;
26 import org.apache.lucene.document.StoredField;
27 import org.apache.lucene.document.StringField;
28 import org.apache.lucene.document.TextField;
29 import org.apache.lucene.index.DirectoryReader;
30 import org.apache.lucene.index.IndexOptions;
31 import org.apache.lucene.index.IndexReader;
32 import org.apache.lucene.index.IndexWriterConfig;
33 import org.apache.lucene.index.RandomIndexWriter;
34 import org.apache.lucene.index.Term;
35 import org.apache.lucene.queries.CustomScoreQuery;
36 import org.apache.lucene.search.BooleanClause;
37 import org.apache.lucene.search.BooleanQuery;
38 import org.apache.lucene.search.IndexSearcher;
39 import org.apache.lucene.search.MatchAllDocsQuery;
40 import org.apache.lucene.search.PhraseQuery;
41 import org.apache.lucene.search.Query;
42 import org.apache.lucene.search.ScoreDoc;
43 import org.apache.lucene.search.Sort;
44 import org.apache.lucene.search.TermQuery;
45 import org.apache.lucene.search.TopDocs;
46 import org.apache.lucene.store.Directory;
47 import org.apache.lucene.util.LuceneTestCase;
48
49 import java.io.BufferedReader;
50 import java.io.IOException;
51 import java.io.InputStreamReader;
52 import java.nio.charset.StandardCharsets;
53 import java.text.BreakIterator;
54 import java.util.Arrays;
55 import java.util.Map;
56
57 public class TestPostingsHighlighter extends LuceneTestCase {
58
59 public void testBasics() throws Exception {
60 Directory dir = newDirectory();
61 IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
62 iwc.setMergePolicy(newLogMergePolicy());
63 RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
64
65 FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
66 offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
67 Field body = new Field("body", "", offsetsType);
68 Document doc = new Document();
69 doc.add(body);
70
71 body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
72 iw.addDocument(doc);
73 body.setStringValue("Highlighting the first term. Hope it works.");
74 iw.addDocument(doc);
75
76 IndexReader ir = iw.getReader();
77 iw.close();
78
79 IndexSearcher searcher = newSearcher(ir);
80 PostingsHighlighter highlighter = new PostingsHighlighter();
81 Query query = new TermQuery(new Term("body", "highlighting"));
82 TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
83 assertEquals(2, topDocs.totalHits);
84 String snippets[] = highlighter.highlight("body", query, searcher, topDocs);
85 assertEquals(2, snippets.length);
86 assertEquals("Just a test <b>highlighting</b> from postings. ", snippets[0]);
87 assertEquals("<b>Highlighting</b> the first term. ", snippets[1]);
88
89 ir.close();
90 dir.close();
91 }
92
93 public void testFormatWithMatchExceedingContentLength2() throws Exception {
94
95 String bodyText = "123 TEST 01234 TEST";
96
97 String[] snippets = formatWithMatchExceedingContentLength(bodyText);
98
99 assertEquals(1, snippets.length);
100 assertEquals("123 <b>TEST</b> 01234 TE", snippets[0]);
101 }
102
103 public void testFormatWithMatchExceedingContentLength3() throws Exception {
104
105 String bodyText = "123 5678 01234 TEST TEST";
106
107 String[] snippets = formatWithMatchExceedingContentLength(bodyText);
108
109 assertEquals(1, snippets.length);
110 assertEquals("123 5678 01234 TE", snippets[0]);
111 }
112
113 public void testFormatWithMatchExceedingContentLength() throws Exception {
114
115 String bodyText = "123 5678 01234 TEST";
116
117 String[] snippets = formatWithMatchExceedingContentLength(bodyText);
118
119 assertEquals(1, snippets.length);
120
121 assertEquals("123 5678 01234 TE", snippets[0]);
122 }
123
124 private String[] formatWithMatchExceedingContentLength(String bodyText) throws IOException {
125
126 int maxLength = 17;
127
128 final Analyzer analyzer = new MockAnalyzer(random());
129
130 Directory dir = newDirectory();
131 IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
132 iwc.setMergePolicy(newLogMergePolicy());
133 RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
134
135 final FieldType fieldType = new FieldType(TextField.TYPE_STORED);
136 fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
137 final Field body = new Field("body", bodyText, fieldType);
138
139 Document doc = new Document();
140 doc.add(body);
141
142 iw.addDocument(doc);
143
144 IndexReader ir = iw.getReader();
145 iw.close();
146
147 IndexSearcher searcher = newSearcher(ir);
148
149 Query query = new TermQuery(new Term("body", "test"));
150
151 TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
152 assertEquals(1, topDocs.totalHits);
153
154 PostingsHighlighter highlighter = new PostingsHighlighter(maxLength);
155 String snippets[] = highlighter.highlight("body", query, searcher, topDocs);
156
157
158 ir.close();
159 dir.close();
160 return snippets;
161 }
162
163
164 public void testHighlightLastWord() throws Exception {
165 Directory dir = newDirectory();
166 IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
167 iwc.setMergePolicy(newLogMergePolicy());
168 RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
169
170 FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
171 offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
172 Field body = new Field("body", "", offsetsType);
173 Document doc = new Document();
174 doc.add(body);
175
176 body.setStringValue("This is a test");
177 iw.addDocument(doc);
178
179 IndexReader ir = iw.getReader();
180 iw.close();
181
182 IndexSearcher searcher = newSearcher(ir);
183 PostingsHighlighter highlighter = new PostingsHighlighter();
184 Query query = new TermQuery(new Term("body", "test"));
185 TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
186 assertEquals(1, topDocs.totalHits);
187 String snippets[] = highlighter.highlight("body", query, searcher, topDocs);
188 assertEquals(1, snippets.length);
189 assertEquals("This is a <b>test</b>", snippets[0]);
190
191 ir.close();
192 dir.close();
193 }
194
195
196 public void testOneSentence() throws Exception {
197 Directory dir = newDirectory();
198
199 IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
200 iwc.setMergePolicy(newLogMergePolicy());
201 RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
202
203 FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
204 offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
205 Field body = new Field("body", "", offsetsType);
206 Document doc = new Document();
207 doc.add(body);
208
209 body.setStringValue("This is a test.");
210 iw.addDocument(doc);
211 body.setStringValue("Test a one sentence document.");
212 iw.addDocument(doc);
213
214 IndexReader ir = iw.getReader();
215 iw.close();
216
217 IndexSearcher searcher = newSearcher(ir);
218 PostingsHighlighter highlighter = new PostingsHighlighter();
219 Query query = new TermQuery(new Term("body", "test"));
220 TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
221 assertEquals(2, topDocs.totalHits);
222 String snippets[] = highlighter.highlight("body", query, searcher, topDocs);
223 assertEquals(2, snippets.length);
224 assertEquals("This is a <b>test</b>.", snippets[0]);
225 assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
226
227 ir.close();
228 dir.close();
229 }
230
231
232 public void testMaxLengthWithMultivalue() throws Exception {
233 Directory dir = newDirectory();
234
235 IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
236 iwc.setMergePolicy(newLogMergePolicy());
237 RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
238
239 FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
240 offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
241 Document doc = new Document();
242
243 for(int i = 0; i < 3 ; i++) {
244 Field body = new Field("body", "", offsetsType);
245 body.setStringValue("This is a multivalued field");
246 doc.add(body);
247 }
248
249 iw.addDocument(doc);
250
251 IndexReader ir = iw.getReader();
252 iw.close();
253
254 IndexSearcher searcher = newSearcher(ir);
255 PostingsHighlighter highlighter = new PostingsHighlighter(40);
256 Query query = new TermQuery(new Term("body", "field"));
257 TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
258 assertEquals(1, topDocs.totalHits);
259 String snippets[] = highlighter.highlight("body", query, searcher, topDocs);
260 assertEquals(1, snippets.length);
261 assertTrue("Snippet should have maximum 40 characters plus the pre and post tags",
262 snippets[0].length() == (40 + "<b></b>".length()));
263
264 ir.close();
265 dir.close();
266 }
267
268 public void testMultipleFields() throws Exception {
269 Directory dir = newDirectory();
270 IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
271 iwc.setMergePolicy(newLogMergePolicy());
272 RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
273
274 FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
275 offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
276 Field body = new Field("body", "", offsetsType);
277 Field title = new Field("title", "", offsetsType);
278 Document doc = new Document();
279 doc.add(body);
280 doc.add(title);
281
282 body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
283 title.setStringValue("I am hoping for the best.");
284 iw.addDocument(doc);
285 body.setStringValue("Highlighting the first term. Hope it works.");
286 title.setStringValue("But best may not be good enough.");
287 iw.addDocument(doc);
288
289 IndexReader ir = iw.getReader();
290 iw.close();
291
292 IndexSearcher searcher = newSearcher(ir);
293 PostingsHighlighter highlighter = new PostingsHighlighter();
294 BooleanQuery.Builder query = new BooleanQuery.Builder();
295 query.add(new TermQuery(new Term("body", "highlighting")), BooleanClause.Occur.SHOULD);
296 query.add(new TermQuery(new Term("title", "best")), BooleanClause.Occur.SHOULD);
297 TopDocs topDocs = searcher.search(query.build(), 10, Sort.INDEXORDER);
298 assertEquals(2, topDocs.totalHits);
299 Map<String,String[]> snippets = highlighter.highlightFields(new String [] { "body", "title" }, query.build(), searcher, topDocs);
300 assertEquals(2, snippets.size());
301 assertEquals("Just a test <b>highlighting</b> from postings. ", snippets.get("body")[0]);
302 assertEquals("<b>Highlighting</b> the first term. ", snippets.get("body")[1]);
303 assertEquals("I am hoping for the <b>best</b>.", snippets.get("title")[0]);
304 assertEquals("But <b>best</b> may not be good enough.", snippets.get("title")[1]);
305 ir.close();
306 dir.close();
307 }
308
309 public void testMultipleTerms() throws Exception {
310 Directory dir = newDirectory();
311 IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
312 iwc.setMergePolicy(newLogMergePolicy());
313 RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
314
315 FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
316 offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
317 Field body = new Field("body", "", offsetsType);
318 Document doc = new Document();
319 doc.add(body);
320
321 body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
322 iw.addDocument(doc);
323 body.setStringValue("Highlighting the first term. Hope it works.");
324 iw.addDocument(doc);
325
326 IndexReader ir = iw.getReader();
327 iw.close();
328
329 IndexSearcher searcher = newSearcher(ir);
330 PostingsHighlighter highlighter = new PostingsHighlighter();
331 BooleanQuery.Builder query = new BooleanQuery.Builder();
332 query.add(new TermQuery(new Term("body", "highlighting")), BooleanClause.Occur.SHOULD);
333 query.add(new TermQuery(new Term("body", "just")), BooleanClause.Occur.SHOULD);
334 query.add(new TermQuery(new Term("body", "first")), BooleanClause.Occur.SHOULD);
335 TopDocs topDocs = searcher.search(query.build(), 10, Sort.INDEXORDER);
336 assertEquals(2, topDocs.totalHits);
337 String snippets[] = highlighter.highlight("body", query.build(), searcher, topDocs);
338 assertEquals(2, snippets.length);
339 assertEquals("<b>Just</b> a test <b>highlighting</b> from postings. ", snippets[0]);
340 assertEquals("<b>Highlighting</b> the <b>first</b> term. ", snippets[1]);
341
342 ir.close();
343 dir.close();
344 }
345
346 public void testMultiplePassages() throws Exception {
347 Directory dir = newDirectory();
348 IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
349 iwc.setMergePolicy(newLogMergePolicy());
350 RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
351
352 FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
353 offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
354 Field body = new Field("body", "", offsetsType);
355 Document doc = new Document();
356 doc.add(body);
357
358 body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
359 iw.addDocument(doc);
360 body.setStringValue("This test is another test. Not a good sentence. Test test test test.");
361 iw.addDocument(doc);
362
363 IndexReader ir = iw.getReader();
364 iw.close();
365
366 IndexSearcher searcher = newSearcher(ir);
367 PostingsHighlighter highlighter = new PostingsHighlighter();
368 Query query = new TermQuery(new Term("body", "test"));
369 TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
370 assertEquals(2, topDocs.totalHits);
371 String snippets[] = highlighter.highlight("body", query, searcher, topDocs, 2);
372 assertEquals(2, snippets.length);
373 assertEquals("This is a <b>test</b>. Just a <b>test</b> highlighting from postings. ", snippets[0]);
374 assertEquals("This <b>test</b> is another <b>test</b>. ... <b>Test</b> <b>test</b> <b>test</b> <b>test</b>.", snippets[1]);
375
376 ir.close();
377 dir.close();
378 }
379
380 public void testUserFailedToIndexOffsets() throws Exception {
381 Directory dir = newDirectory();
382 IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
383 iwc.setMergePolicy(newLogMergePolicy());
384 RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
385
386 FieldType positionsType = new FieldType(TextField.TYPE_STORED);
387 positionsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
388 Field body = new Field("body", "", positionsType);
389 Field title = new StringField("title", "", Field.Store.YES);
390 Document doc = new Document();
391 doc.add(body);
392 doc.add(title);
393
394 body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
395 title.setStringValue("test");
396 iw.addDocument(doc);
397 body.setStringValue("This test is another test. Not a good sentence. Test test test test.");
398 title.setStringValue("test");
399 iw.addDocument(doc);
400
401 IndexReader ir = iw.getReader();
402 iw.close();
403
404 IndexSearcher searcher = newSearcher(ir);
405 PostingsHighlighter highlighter = new PostingsHighlighter();
406 Query query = new TermQuery(new Term("body", "test"));
407 TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
408 assertEquals(2, topDocs.totalHits);
409 try {
410 highlighter.highlight("body", query, searcher, topDocs, 2);
411 fail("did not hit expected exception");
412 } catch (IllegalArgumentException iae) {
413
414 }
415
416 try {
417 highlighter.highlight("title", new TermQuery(new Term("title", "test")), searcher, topDocs, 2);
418 fail("did not hit expected exception");
419 } catch (IllegalArgumentException iae) {
420
421 }
422 ir.close();
423 dir.close();
424 }
425
426 public void testBuddhism() throws Exception {
427 String text = "This eight-volume set brings together seminal papers in Buddhist studies from a vast " +
428 "range of academic disciplines published over the last forty years. With a new introduction " +
429 "by the editor, this collection is a unique and unrivalled research resource for both " +
430 "student and scholar. Coverage includes: - Buddhist origins; early history of Buddhism in " +
431 "South and Southeast Asia - early Buddhist Schools and Doctrinal History; Theravada Doctrine " +
432 "- the Origins and nature of Mahayana Buddhism; some Mahayana religious topics - Abhidharma " +
433 "and Madhyamaka - Yogacara, the Epistemological tradition, and Tathagatagarbha - Tantric " +
434 "Buddhism (Including China and Japan); Buddhism in Nepal and Tibet - Buddhism in South and " +
435 "Southeast Asia, and - Buddhism in China, East Asia, and Japan.";
436 Directory dir = newDirectory();
437 Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
438 RandomIndexWriter iw = new RandomIndexWriter(random(), dir, analyzer);
439
440 FieldType positionsType = new FieldType(TextField.TYPE_STORED);
441 positionsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
442 Field body = new Field("body", text, positionsType);
443 Document document = new Document();
444 document.add(body);
445 iw.addDocument(document);
446 IndexReader ir = iw.getReader();
447 iw.close();
448 IndexSearcher searcher = newSearcher(ir);
449 PhraseQuery query = new PhraseQuery("body", "buddhist", "origins");
450 TopDocs topDocs = searcher.search(query, 10);
451 assertEquals(1, topDocs.totalHits);
452 PostingsHighlighter highlighter = new PostingsHighlighter();
453 String snippets[] = highlighter.highlight("body", query, searcher, topDocs, 2);
454 assertEquals(1, snippets.length);
455 assertTrue(snippets[0].contains("<b>Buddhist</b> <b>origins</b>"));
456 ir.close();
457 dir.close();
458 }
459
460 public void testCuriousGeorge() throws Exception {
461 String text = "It’s the formula for success for preschoolers—Curious George and fire trucks! " +
462 "Curious George and the Firefighters is a story based on H. A. and Margret Rey’s " +
463 "popular primate and painted in the original watercolor and charcoal style. " +
464 "Firefighters are a famously brave lot, but can they withstand a visit from one curious monkey?";
465 Directory dir = newDirectory();
466 Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
467 RandomIndexWriter iw = new RandomIndexWriter(random(), dir, analyzer);
468 FieldType positionsType = new FieldType(TextField.TYPE_STORED);
469 positionsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
470 Field body = new Field("body", text, positionsType);
471 Document document = new Document();
472 document.add(body);
473 iw.addDocument(document);
474 IndexReader ir = iw.getReader();
475 iw.close();
476 IndexSearcher searcher = newSearcher(ir);
477 PhraseQuery query = new PhraseQuery("body", "curious", "george");
478 TopDocs topDocs = searcher.search(query, 10);
479 assertEquals(1, topDocs.totalHits);
480 PostingsHighlighter highlighter = new PostingsHighlighter();
481 String snippets[] = highlighter.highlight("body", query, searcher, topDocs, 2);
482 assertEquals(1, snippets.length);
483 assertFalse(snippets[0].contains("<b>Curious</b>Curious"));
484 ir.close();
485 dir.close();
486 }
487
488 public void testCambridgeMA() throws Exception {
489 BufferedReader r = new BufferedReader(new InputStreamReader(
490 this.getClass().getResourceAsStream("CambridgeMA.utf8"), StandardCharsets.UTF_8));
491 String text = r.readLine();
492 r.close();
493 Directory dir = newDirectory();
494 Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
495 RandomIndexWriter iw = new RandomIndexWriter(random(), dir, analyzer);
496 FieldType positionsType = new FieldType(TextField.TYPE_STORED);
497 positionsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
498 Field body = new Field("body", text, positionsType);
499 Document document = new Document();
500 document.add(body);
501 iw.addDocument(document);
502 IndexReader ir = iw.getReader();
503 iw.close();
504 IndexSearcher searcher = newSearcher(ir);
505 BooleanQuery.Builder query = new BooleanQuery.Builder();
506 query.add(new TermQuery(new Term("body", "porter")), BooleanClause.Occur.SHOULD);
507 query.add(new TermQuery(new Term("body", "square")), BooleanClause.Occur.SHOULD);
508 query.add(new TermQuery(new Term("body", "massachusetts")), BooleanClause.Occur.SHOULD);
509 TopDocs topDocs = searcher.search(query.build(), 10);
510 assertEquals(1, topDocs.totalHits);
511 PostingsHighlighter highlighter = new PostingsHighlighter(Integer.MAX_VALUE-1);
512 String snippets[] = highlighter.highlight("body", query.build(), searcher, topDocs, 2);
513 assertEquals(1, snippets.length);
514 assertTrue(snippets[0].contains("<b>Square</b>"));
515 assertTrue(snippets[0].contains("<b>Porter</b>"));
516 ir.close();
517 dir.close();
518 }
519
520 public void testPassageRanking() throws Exception {
521 Directory dir = newDirectory();
522 IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
523 iwc.setMergePolicy(newLogMergePolicy());
524 RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
525
526 FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
527 offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
528 Field body = new Field("body", "", offsetsType);
529 Document doc = new Document();
530 doc.add(body);
531
532 body.setStringValue("This is a test. Just highlighting from postings. This is also a much sillier test. Feel free to test test test test test test test.");
533 iw.addDocument(doc);
534
535 IndexReader ir = iw.getReader();
536 iw.close();
537
538 IndexSearcher searcher = newSearcher(ir);
539 PostingsHighlighter highlighter = new PostingsHighlighter();
540 Query query = new TermQuery(new Term("body", "test"));
541 TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
542 assertEquals(1, topDocs.totalHits);
543 String snippets[] = highlighter.highlight("body", query, searcher, topDocs, 2);
544 assertEquals(1, snippets.length);
545 assertEquals("This is a <b>test</b>. ... Feel free to <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b>.", snippets[0]);
546
547 ir.close();
548 dir.close();
549 }
550
551 public void testBooleanMustNot() throws Exception {
552 Directory dir = newDirectory();
553 Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
554 RandomIndexWriter iw = new RandomIndexWriter(random(), dir, analyzer);
555 FieldType positionsType = new FieldType(TextField.TYPE_STORED);
556 positionsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
557 Field body = new Field("body", "This sentence has both terms. This sentence has only terms.", positionsType);
558 Document document = new Document();
559 document.add(body);
560 iw.addDocument(document);
561 IndexReader ir = iw.getReader();
562 iw.close();
563 IndexSearcher searcher = newSearcher(ir);
564 BooleanQuery.Builder query = new BooleanQuery.Builder();
565 query.add(new TermQuery(new Term("body", "terms")), BooleanClause.Occur.SHOULD);
566 BooleanQuery.Builder query2 = new BooleanQuery.Builder();
567 query.add(query2.build(), BooleanClause.Occur.SHOULD);
568 query2.add(new TermQuery(new Term("body", "both")), BooleanClause.Occur.MUST_NOT);
569 TopDocs topDocs = searcher.search(query.build(), 10);
570 assertEquals(1, topDocs.totalHits);
571 PostingsHighlighter highlighter = new PostingsHighlighter(Integer.MAX_VALUE-1);
572 String snippets[] = highlighter.highlight("body", query.build(), searcher, topDocs, 2);
573 assertEquals(1, snippets.length);
574 assertFalse(snippets[0].contains("<b>both</b>"));
575 ir.close();
576 dir.close();
577 }
578
579 public void testHighlightAllText() throws Exception {
580 Directory dir = newDirectory();
581 IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
582 iwc.setMergePolicy(newLogMergePolicy());
583 RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
584
585 FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
586 offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
587 Field body = new Field("body", "", offsetsType);
588 Document doc = new Document();
589 doc.add(body);
590
591 body.setStringValue("This is a test. Just highlighting from postings. This is also a much sillier test. Feel free to test test test test test test test.");
592 iw.addDocument(doc);
593
594 IndexReader ir = iw.getReader();
595 iw.close();
596
597 IndexSearcher searcher = newSearcher(ir);
598 PostingsHighlighter highlighter = new PostingsHighlighter(10000) {
599 @Override
600 protected BreakIterator getBreakIterator(String field) {
601 return new WholeBreakIterator();
602 }
603 };
604 Query query = new TermQuery(new Term("body", "test"));
605 TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
606 assertEquals(1, topDocs.totalHits);
607 String snippets[] = highlighter.highlight("body", query, searcher, topDocs, 2);
608 assertEquals(1, snippets.length);
609 assertEquals("This is a <b>test</b>. Just highlighting from postings. This is also a much sillier <b>test</b>. Feel free to <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b>.", snippets[0]);
610
611 ir.close();
612 dir.close();
613 }
614
615 public void testSpecificDocIDs() throws Exception {
616 Directory dir = newDirectory();
617 IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
618 iwc.setMergePolicy(newLogMergePolicy());
619 RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
620
621 FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
622 offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
623 Field body = new Field("body", "", offsetsType);
624 Document doc = new Document();
625 doc.add(body);
626
627 body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
628 iw.addDocument(doc);
629 body.setStringValue("Highlighting the first term. Hope it works.");
630 iw.addDocument(doc);
631
632 IndexReader ir = iw.getReader();
633 iw.close();
634
635 IndexSearcher searcher = newSearcher(ir);
636 PostingsHighlighter highlighter = new PostingsHighlighter();
637 Query query = new TermQuery(new Term("body", "highlighting"));
638 TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
639 assertEquals(2, topDocs.totalHits);
640 ScoreDoc[] hits = topDocs.scoreDocs;
641 int[] docIDs = new int[2];
642 docIDs[0] = hits[0].doc;
643 docIDs[1] = hits[1].doc;
644 String snippets[] = highlighter.highlightFields(new String[] {"body"}, query, searcher, docIDs, new int[] { 1 }).get("body");
645 assertEquals(2, snippets.length);
646 assertEquals("Just a test <b>highlighting</b> from postings. ", snippets[0]);
647 assertEquals("<b>Highlighting</b> the first term. ", snippets[1]);
648
649 ir.close();
650 dir.close();
651 }
652
653 public void testCustomFieldValueSource() throws Exception {
654 Directory dir = newDirectory();
655 IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
656 iwc.setMergePolicy(newLogMergePolicy());
657 RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
658
659 Document doc = new Document();
660
661 FieldType offsetsType = new FieldType(TextField.TYPE_NOT_STORED);
662 offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
663 final String text = "This is a test. Just highlighting from postings. This is also a much sillier test. Feel free to test test test test test test test.";
664 Field body = new Field("body", text, offsetsType);
665 doc.add(body);
666 iw.addDocument(doc);
667
668 IndexReader ir = iw.getReader();
669 iw.close();
670
671 IndexSearcher searcher = newSearcher(ir);
672
673 PostingsHighlighter highlighter = new PostingsHighlighter(10000) {
674 @Override
675 protected String[][] loadFieldValues(IndexSearcher searcher, String[] fields, int[] docids, int maxLength) throws IOException {
676 assert fields.length == 1;
677 assert docids.length == 1;
678 String[][] contents = new String[1][1];
679 contents[0][0] = text;
680 return contents;
681 }
682
683 @Override
684 protected BreakIterator getBreakIterator(String field) {
685 return new WholeBreakIterator();
686 }
687 };
688
689 Query query = new TermQuery(new Term("body", "test"));
690 TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
691 assertEquals(1, topDocs.totalHits);
692 String snippets[] = highlighter.highlight("body", query, searcher, topDocs, 2);
693 assertEquals(1, snippets.length);
694 assertEquals("This is a <b>test</b>. Just highlighting from postings. This is also a much sillier <b>test</b>. Feel free to <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b>.", snippets[0]);
695
696 ir.close();
697 dir.close();
698 }
699
700
701
702 public void testEmptyHighlights() throws Exception {
703 Directory dir = newDirectory();
704 IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
705 iwc.setMergePolicy(newLogMergePolicy());
706 RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
707
708 FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
709 offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
710 Document doc = new Document();
711
712 Field body = new Field("body", "test this is. another sentence this test has. far away is that planet.", offsetsType);
713 doc.add(body);
714 iw.addDocument(doc);
715
716 IndexReader ir = iw.getReader();
717 iw.close();
718
719 IndexSearcher searcher = newSearcher(ir);
720 PostingsHighlighter highlighter = new PostingsHighlighter();
721 Query query = new TermQuery(new Term("body", "highlighting"));
722 int[] docIDs = new int[] {0};
723 String snippets[] = highlighter.highlightFields(new String[] {"body"}, query, searcher, docIDs, new int[] { 2 }).get("body");
724 assertEquals(1, snippets.length);
725 assertEquals("test this is. another sentence this test has. ", snippets[0]);
726
727 ir.close();
728 dir.close();
729 }
730
731
732
733 public void testCustomEmptyHighlights() throws Exception {
734 Directory dir = newDirectory();
735 IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
736 iwc.setMergePolicy(newLogMergePolicy());
737 RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
738
739 FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
740 offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
741 Document doc = new Document();
742
743 Field body = new Field("body", "test this is. another sentence this test has. far away is that planet.", offsetsType);
744 doc.add(body);
745 iw.addDocument(doc);
746
747 IndexReader ir = iw.getReader();
748 iw.close();
749
750 IndexSearcher searcher = newSearcher(ir);
751 PostingsHighlighter highlighter = new PostingsHighlighter() {
752 @Override
753 public Passage[] getEmptyHighlight(String fieldName, BreakIterator bi, int maxPassages) {
754 return new Passage[0];
755 }
756 };
757 Query query = new TermQuery(new Term("body", "highlighting"));
758 int[] docIDs = new int[] {0};
759 String snippets[] = highlighter.highlightFields(new String[] {"body"}, query, searcher, docIDs, new int[] { 2 }).get("body");
760 assertEquals(1, snippets.length);
761 assertNull(snippets[0]);
762
763 ir.close();
764 dir.close();
765 }
766
767
768
769 public void testEmptyHighlightsWhole() throws Exception {
770 Directory dir = newDirectory();
771 IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
772 iwc.setMergePolicy(newLogMergePolicy());
773 RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
774
775 FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
776 offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
777 Document doc = new Document();
778
779 Field body = new Field("body", "test this is. another sentence this test has. far away is that planet.", offsetsType);
780 doc.add(body);
781 iw.addDocument(doc);
782
783 IndexReader ir = iw.getReader();
784 iw.close();
785
786 IndexSearcher searcher = newSearcher(ir);
787 PostingsHighlighter highlighter = new PostingsHighlighter(10000) {
788 @Override
789 protected BreakIterator getBreakIterator(String field) {
790 return new WholeBreakIterator();
791 }
792 };
793 Query query = new TermQuery(new Term("body", "highlighting"));
794 int[] docIDs = new int[] {0};
795 String snippets[] = highlighter.highlightFields(new String[] {"body"}, query, searcher, docIDs, new int[] { 2 }).get("body");
796 assertEquals(1, snippets.length);
797 assertEquals("test this is. another sentence this test has. far away is that planet.", snippets[0]);
798
799 ir.close();
800 dir.close();
801 }
802
803
804
805 public void testFieldIsMissing() throws Exception {
806 Directory dir = newDirectory();
807 IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
808 iwc.setMergePolicy(newLogMergePolicy());
809 RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
810
811 FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
812 offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
813 Document doc = new Document();
814
815 Field body = new Field("body", "test this is. another sentence this test has. far away is that planet.", offsetsType);
816 doc.add(body);
817 iw.addDocument(doc);
818
819 IndexReader ir = iw.getReader();
820 iw.close();
821
822 IndexSearcher searcher = newSearcher(ir);
823 PostingsHighlighter highlighter = new PostingsHighlighter();
824 Query query = new TermQuery(new Term("bogus", "highlighting"));
825 int[] docIDs = new int[] {0};
826 String snippets[] = highlighter.highlightFields(new String[] {"bogus"}, query, searcher, docIDs, new int[] { 2 }).get("bogus");
827 assertEquals(1, snippets.length);
828 assertNull(snippets[0]);
829
830 ir.close();
831 dir.close();
832 }
833
834 public void testFieldIsJustSpace() throws Exception {
835 Directory dir = newDirectory();
836 IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
837 iwc.setMergePolicy(newLogMergePolicy());
838 RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
839
840 FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
841 offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
842
843 Document doc = new Document();
844 doc.add(new Field("body", " ", offsetsType));
845 doc.add(new Field("id", "id", offsetsType));
846 iw.addDocument(doc);
847
848 doc = new Document();
849 doc.add(new Field("body", "something", offsetsType));
850 iw.addDocument(doc);
851
852 IndexReader ir = iw.getReader();
853 iw.close();
854
855 IndexSearcher searcher = newSearcher(ir);
856 PostingsHighlighter highlighter = new PostingsHighlighter();
857 int docID = searcher.search(new TermQuery(new Term("id", "id")), 1).scoreDocs[0].doc;
858
859 Query query = new TermQuery(new Term("body", "highlighting"));
860 int[] docIDs = new int[1];
861 docIDs[0] = docID;
862 String snippets[] = highlighter.highlightFields(new String[] {"body"}, query, searcher, docIDs, new int[] { 2 }).get("body");
863 assertEquals(1, snippets.length);
864 assertEquals(" ", snippets[0]);
865
866 ir.close();
867 dir.close();
868 }
869
870 public void testFieldIsEmptyString() throws Exception {
871 Directory dir = newDirectory();
872 IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
873 iwc.setMergePolicy(newLogMergePolicy());
874 RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
875
876 FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
877 offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
878
879 Document doc = new Document();
880 doc.add(new Field("body", "", offsetsType));
881 doc.add(new Field("id", "id", offsetsType));
882 iw.addDocument(doc);
883
884 doc = new Document();
885 doc.add(new Field("body", "something", offsetsType));
886 iw.addDocument(doc);
887
888 IndexReader ir = iw.getReader();
889 iw.close();
890
891 IndexSearcher searcher = newSearcher(ir);
892 PostingsHighlighter highlighter = new PostingsHighlighter();
893 int docID = searcher.search(new TermQuery(new Term("id", "id")), 1).scoreDocs[0].doc;
894
895 Query query = new TermQuery(new Term("body", "highlighting"));
896 int[] docIDs = new int[1];
897 docIDs[0] = docID;
898 String snippets[] = highlighter.highlightFields(new String[] {"body"}, query, searcher, docIDs, new int[] { 2 }).get("body");
899 assertEquals(1, snippets.length);
900 assertNull(snippets[0]);
901
902 ir.close();
903 dir.close();
904 }
905
906 public void testMultipleDocs() throws Exception {
907 Directory dir = newDirectory();
908 IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
909 iwc.setMergePolicy(newLogMergePolicy());
910 RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
911
912 FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
913 offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
914
915 int numDocs = atLeast(100);
916 for(int i=0;i<numDocs;i++) {
917 Document doc = new Document();
918 String content = "the answer is " + i;
919 if ((i & 1) == 0) {
920 content += " some more terms";
921 }
922 doc.add(new Field("body", content, offsetsType));
923 doc.add(newStringField("id", ""+i, Field.Store.YES));
924 iw.addDocument(doc);
925
926 if (random().nextInt(10) == 2) {
927 iw.commit();
928 }
929 }
930
931 IndexReader ir = iw.getReader();
932 iw.close();
933
934 IndexSearcher searcher = newSearcher(ir);
935 PostingsHighlighter highlighter = new PostingsHighlighter();
936 Query query = new TermQuery(new Term("body", "answer"));
937 TopDocs hits = searcher.search(query, numDocs);
938 assertEquals(numDocs, hits.totalHits);
939
940 String snippets[] = highlighter.highlight("body", query, searcher, hits);
941 assertEquals(numDocs, snippets.length);
942 for(int hit=0;hit<numDocs;hit++) {
943 Document doc = searcher.doc(hits.scoreDocs[hit].doc);
944 int id = Integer.parseInt(doc.get("id"));
945 String expected = "the <b>answer</b> is " + id;
946 if ((id & 1) == 0) {
947 expected += " some more terms";
948 }
949 assertEquals(expected, snippets[hit]);
950 }
951
952 ir.close();
953 dir.close();
954 }
955
956 public void testMultipleSnippetSizes() throws Exception {
957 Directory dir = newDirectory();
958 IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
959 iwc.setMergePolicy(newLogMergePolicy());
960 RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
961
962 FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
963 offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
964 Field body = new Field("body", "", offsetsType);
965 Field title = new Field("title", "", offsetsType);
966 Document doc = new Document();
967 doc.add(body);
968 doc.add(title);
969
970 body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
971 title.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
972 iw.addDocument(doc);
973
974 IndexReader ir = iw.getReader();
975 iw.close();
976
977 IndexSearcher searcher = newSearcher(ir);
978 PostingsHighlighter highlighter = new PostingsHighlighter();
979 BooleanQuery.Builder query = new BooleanQuery.Builder();
980 query.add(new TermQuery(new Term("body", "test")), BooleanClause.Occur.SHOULD);
981 query.add(new TermQuery(new Term("title", "test")), BooleanClause.Occur.SHOULD);
982 Map<String,String[]> snippets = highlighter.highlightFields(new String[] { "title", "body" }, query.build(), searcher, new int[] { 0 }, new int[] { 1, 2 });
983 String titleHighlight = snippets.get("title")[0];
984 String bodyHighlight = snippets.get("body")[0];
985 assertEquals("This is a <b>test</b>. ", titleHighlight);
986 assertEquals("This is a <b>test</b>. Just a <b>test</b> highlighting from postings. ", bodyHighlight);
987 ir.close();
988 dir.close();
989 }
990
991 public void testEncode() throws Exception {
992 Directory dir = newDirectory();
993 IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
994 iwc.setMergePolicy(newLogMergePolicy());
995 RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
996
997 FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
998 offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
999 Field body = new Field("body", "", offsetsType);
1000 Document doc = new Document();
1001 doc.add(body);
1002
1003 body.setStringValue("This is a test. Just a test highlighting from <i>postings</i>. Feel free to ignore.");
1004 iw.addDocument(doc);
1005
1006 IndexReader ir = iw.getReader();
1007 iw.close();
1008
1009 IndexSearcher searcher = newSearcher(ir);
1010 PostingsHighlighter highlighter = new PostingsHighlighter() {
1011 @Override
1012 protected PassageFormatter getFormatter(String field) {
1013 return new DefaultPassageFormatter("<b>", "</b>", "... ", true);
1014 }
1015 };
1016 Query query = new TermQuery(new Term("body", "highlighting"));
1017 TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
1018 assertEquals(1, topDocs.totalHits);
1019 String snippets[] = highlighter.highlight("body", query, searcher, topDocs);
1020 assertEquals(1, snippets.length);
1021 assertEquals("Just a test <b>highlighting</b> from <i>postings</i>. ", snippets[0]);
1022
1023 ir.close();
1024 dir.close();
1025 }
1026
1027
1028 public void testGapSeparator() throws Exception {
1029 Directory dir = newDirectory();
1030
1031 IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
1032 iwc.setMergePolicy(newLogMergePolicy());
1033 RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
1034
1035 FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
1036 offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
1037 Document doc = new Document();
1038
1039 Field body1 = new Field("body", "", offsetsType);
1040 body1.setStringValue("This is a multivalued field");
1041 doc.add(body1);
1042
1043 Field body2 = new Field("body", "", offsetsType);
1044 body2.setStringValue("This is something different");
1045 doc.add(body2);
1046
1047 iw.addDocument(doc);
1048
1049 IndexReader ir = iw.getReader();
1050 iw.close();
1051
1052 IndexSearcher searcher = newSearcher(ir);
1053 PostingsHighlighter highlighter = new PostingsHighlighter() {
1054 @Override
1055 protected char getMultiValuedSeparator(String field) {
1056 assert field.equals("body");
1057 return '\u2029';
1058 }
1059 };
1060 Query query = new TermQuery(new Term("body", "field"));
1061 TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
1062 assertEquals(1, topDocs.totalHits);
1063 String snippets[] = highlighter.highlight("body", query, searcher, topDocs);
1064 assertEquals(1, snippets.length);
1065 assertEquals("This is a multivalued <b>field</b>\u2029", snippets[0]);
1066
1067 ir.close();
1068 dir.close();
1069 }
1070
1071
1072 public void testObjectFormatter() throws Exception {
1073 Directory dir = newDirectory();
1074 IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
1075 iwc.setMergePolicy(newLogMergePolicy());
1076 RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
1077
1078 FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
1079 offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
1080 Field body = new Field("body", "", offsetsType);
1081 Document doc = new Document();
1082 doc.add(body);
1083
1084 body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
1085 iw.addDocument(doc);
1086
1087 IndexReader ir = iw.getReader();
1088 iw.close();
1089
1090 IndexSearcher searcher = newSearcher(ir);
1091 PostingsHighlighter highlighter = new PostingsHighlighter() {
1092 @Override
1093 protected PassageFormatter getFormatter(String field) {
1094 return new PassageFormatter() {
1095 PassageFormatter defaultFormatter = new DefaultPassageFormatter();
1096
1097 @Override
1098 public String[] format(Passage passages[], String content) {
1099
1100
1101 return new String[] {"blah blah", defaultFormatter.format(passages, content).toString()};
1102 }
1103 };
1104 }
1105 };
1106
1107 Query query = new TermQuery(new Term("body", "highlighting"));
1108 TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
1109 assertEquals(1, topDocs.totalHits);
1110 int[] docIDs = new int[1];
1111 docIDs[0] = topDocs.scoreDocs[0].doc;
1112 Map<String,Object[]> snippets = highlighter.highlightFieldsAsObjects(new String[]{"body"}, query, searcher, docIDs, new int[] {1});
1113 Object[] bodySnippets = snippets.get("body");
1114 assertEquals(1, bodySnippets.length);
1115 assertTrue(Arrays.equals(new String[] {"blah blah", "Just a test <b>highlighting</b> from postings. "}, (String[]) bodySnippets[0]));
1116
1117 ir.close();
1118 dir.close();
1119 }
1120
1121 public void testFieldSometimesMissingFromSegment() throws Exception {
1122 Directory dir = newDirectory();
1123 IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
1124 iwc.setMergePolicy(newLogMergePolicy());
1125 RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
1126
1127 FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
1128 offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
1129 Field body = new Field("body", "foo", offsetsType);
1130 Document doc = new Document();
1131 doc.add(body);
1132 iw.addDocument(doc);
1133
1134
1135 iw.commit();
1136 doc = new Document();
1137 doc.add(new StoredField("body", "foo"));
1138 iw.addDocument(doc);
1139
1140 IndexReader ir = DirectoryReader.open(iw.w, true);
1141 iw.close();
1142
1143 IndexSearcher searcher = new IndexSearcher(ir);
1144 PostingsHighlighter highlighter = new PostingsHighlighter();
1145 Query query = new MatchAllDocsQuery();
1146 TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
1147 assertEquals(2, topDocs.totalHits);
1148 String snippets[] = highlighter.highlight("body", query, searcher, topDocs);
1149 assertEquals(2, snippets.length);
1150 assertEquals("foo", snippets[0]);
1151 assertNull(snippets[1]);
1152 ir.close();
1153 dir.close();
1154 }
1155
1156 public void testCustomScoreQueryHighlight() throws Exception{
1157 Directory dir = newDirectory();
1158 IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
1159 iwc.setMergePolicy(newLogMergePolicy());
1160 RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
1161
1162 FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
1163 offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
1164 Field body = new Field("body", "", offsetsType);
1165 Document doc = new Document();
1166 doc.add(body);
1167
1168 body.setStringValue("This piece of text refers to Kennedy at the beginning then has a longer piece of text that is very long in the middle and finally ends with another reference to Kennedy");
1169 iw.addDocument(doc);
1170
1171 IndexReader ir = iw.getReader();
1172 iw.close();
1173
1174 TermQuery termQuery = new TermQuery(new Term("body", "very"));
1175 PostingsHighlighter highlighter = new PostingsHighlighter();
1176 CustomScoreQuery query = new CustomScoreQuery(termQuery);
1177
1178 IndexSearcher searcher = newSearcher(ir);
1179 TopDocs hits = searcher.search(query, 10);
1180 assertEquals(1, hits.totalHits);
1181
1182 String snippets[] = highlighter.highlight("body", query, searcher, hits);
1183 assertEquals(1, snippets.length);
1184 assertEquals("This piece of text refers to Kennedy at the beginning then has a longer piece of text that is <b>very</b> long in the middle and finally ends with another reference to Kennedy",
1185 snippets[0]);
1186
1187 ir.close();
1188 dir.close();
1189 }
1190 }